Conducting further analysis on the cleaned up data from the green-up-project.qmd file.
Regions
current focus
Companies
after 2019 not included in the SURS data
per region, per company number of companies type of waste per companies, how many different types node = a company in a region, point on a map cant just focus on headquarters (HQ) of the node try to figure out connection between operator, collector and storage descriptive statistics for all years
Setup
# Load required libraries# List of required packagespackages <-c("tidyverse", "scales", "viridis", "ggplot2", "gganimate", "ggridges", "plotly", "htmlwidgets", "reshape2", "skimr", "echarts4r")# Function to check if a package is installed, and install it if notinstall_and_load <-function(pkg) {if (!require(pkg, character.only =TRUE)) {install.packages(pkg, dependencies =TRUE)library(pkg, character.only =TRUE) }}if (!require("echarts4r.maps")) {install.packages("remotes") remotes::install_github('JohnCoene/echarts4r.maps')}# Set the working directory to the project foldersetwd("C:/Users/kovac/Desktop/Work/Green UP Project/green-up-project")# Apply the function to each package in the listlapply(packages, install_and_load)
# List of municipalities for each regionPOMURSKA <-c("APAČE","BELTINCI","CANKOVA","ČRENŠOVCI","DOBROVNIK","DOBRONAK","GORNJA RADGONA","GORNJI PETROVCI","GRAD","HODOŠ","KOBILJE","KRIŽEVCI","KUZMA","LENDAVA","LENDAVA - LENDVA","LJUTOMER","MORAVSKE TOPLICE","MURSKA SOBOTA","ODRANCI","PUCONCI","RADENCI","RAZKRIŽJE","ROGAŠOVCI","SVETI JURIJ OB ŠČAVNICI","ŠALOVCI","TIŠINA","TURNIŠČE","VELIKA POLANA","VERŽEJ")PODRAVSKA <-c("BENEDIKT","CERKVENJAK","CIRKULANE","DESTRNIK","DORNAVA","DUPLEK","GORIŠNICA","HAJDINA","HOČE-SLIVNICA","JURŠINCI","KIDRIČEVO","KUNGOTA","LENART","LOVRENC NA POHORJU","MAJŠPERK","MAKOLE","MARIBOR","MARKOVCI","MIKLAVŽ NA DRAVSKEM POLJU","OPLOTNICA","ORMOŽ","PESNICA","PODLEHNIK","POLJČANE","PTUJ","RAČE-FRAM","RUŠE","SELNICA OB DRAVI","SLOVENSKA BISTRICA","SREDIŠČE OB DRAVI","STARŠE","SVETA ANA","SVETA TROJICA V SLOV. GORICAH","SVETA TROJICA V SLOVENSKIH GORICAH","SV. TROJICA V SLOV. GORICAH","SVETI ANDRAŽ V SLOV. GORICAH","SVETI JURIJ V SLOV. GORICAH","SVETI JURIJ V SLOVENSKIH GORICAH","SVETI TOMAŽ","ŠENTILJ","TRNOVSKA VAS","VIDEM","ZAVRČ","ŽETALE")KOROŠKA <-c("ČRNA NA KOROŠKEM","DRAVOGRAD","MEŽICA","MISLINJA","MUTA","PODVELKA","PREVALJE","RADLJE OB DRAVI","PROŠKEM","RAVNE NA KOROŠKEM","RIBNICA NA POHORJU","SLOVENJ GRADEC","VUZENICA")SAVINJSKA <-c("BRASLOVČE","CELJE","DOBJE","DOBRNA","GORNJI GRAD","KOZJE","LAŠKO","LJUBNO","LUČE","MOZIRJE","NAZARJE","PODČETRTEK","POLZELA","PREBOLD","REČICA OB SAVINJI","ROGAŠKA SLATINA","ROGATEC","SLOVENSKE KONJICE","SOLČAVA","ŠENTJUR","ŠMARJE PRI JELŠAH","ŠMARTNO OB PAKI","ŠOŠTANJ","ŠTORE","TABOR","VELENJE","VITANJE","VOJNIK","VRANSKO","ZREČE","ŽALEC")ZASAVSKA <-c("HRASTNIK", "LITIJA", "TRBOVLJE", "ZAGORJE OB SAVI")POSAVSKA <-c("BISTRICA OB SOTLI","BREŽICE","KOSTANJEVICA NA KRKI","KRŠKO","RADEČE","SEVNICA")JUGOVZHODNA.SLOVENIJA <-c("ČRNOMELJ","DOLENJSKE TOPLICE","KOČEVJE","KOSTEL","LOŠKI POTOK","METLIKA","MIRNA","MIRNA PEČ","MOKRONOG-TREBELNO","NOVO MESTO","OSILNICA","RIBNICA","SEMIČ","SODRAŽICA","STRAŽA","ŠENTJERNEJ","ŠENTRUPERT","ŠKOCJAN","ŠMARJEŠKE TOPLICE","TREBNJE","ŽUŽEMBERK")OSREDNJESLOVENSKA <-c("BOROVNICA","BREZOVICA","DOBREPOLJE","DOBROVA-POLHOV GRADEC","DOL PRI LJUBLJANI","DOMŽALE","GROSUPLJE","HORJUL","IG","IVANČNA GORICA","KAMNIK","KOMENDA","LJUBLJANA","LOG-DRAGOMER","LOGATEC","LUKOVICA","MEDVODE","MENGEŠ","MORAVČE","ŠKOFLJICA","ŠMARTNO PRI LITIJI","TRZIN","VELIKE LAŠČE","VODICE","VRHNIKA")GORENJSKA <-c("BLED","BOHINJ","BOHINJSKA BISTRICA","CERKLJE NA GORENJSKEM","GORENJA VAS-POLJANE","GORENJA VAS","GORJE","JESENICE","JEZERSKO","KRANJ","KRANJSKA GORA","NAKLO","PREDDVOR","RADOVLJICA","ŠENČUR","ŠKOFJA LOKA","TRŽIČ","ŽELEZNIKI","ŽIRI","ŽIROVNICA","GOZD MARTULJEK")PRIMORSKONOTRANJSKA <-c("BLOKE","CERKNICA","ILIRSKA BISTRICA","LOŠKA DOLINA","PIVKA","POSTOJNA","NOVA VAS")GORIŠKA <-c("AJDOVŠČINA","BOVEC","BRDA","CERKNO","IDRIJA","KANAL","KOBARID","MIREN-KOSTANJEVICA","NOVA GORICA","RENČE-VOGRSKO","ŠEMPETER-VRTOJBA","TOLMIN","VIPAVA")OBALNOKRAŠKA <-c("ANKARAN","DIVAČA","HRPELJE-KOZINA","IZOLA","IZOLA - ISOLA","KOMEN","KOPER","KOPER - CAPODISTRIA","PIRAN","PIRAN - PIRANO","SEŽANA")NEOPREDELJENO <-"NEOPREDELJENO"UNDEFINED <-"UNDEFINED"# Create a data frame with all municipality-region mappingsmunicipality_region_mapping <-data.frame(name_of_municipality =c( POMURSKA, PODRAVSKA, KOROŠKA, SAVINJSKA, ZASAVSKA, POSAVSKA, JUGOVZHODNA.SLOVENIJA, OSREDNJESLOVENSKA, GORENJSKA, PRIMORSKONOTRANJSKA, GORIŠKA, OBALNOKRAŠKA, NEOPREDELJENO, UNDEFINED ),statistical_region =c(rep("POMURSKA", length(POMURSKA)),rep("PODRAVSKA", length(PODRAVSKA)),rep("KOROŠKA", length(KOROŠKA)),rep("SAVINJSKA", length(SAVINJSKA)),rep("ZASAVSKA", length(ZASAVSKA)),rep("POSAVSKA", length(POSAVSKA)),rep("JUGOVZHODNA SLOVENIJA", length(JUGOVZHODNA.SLOVENIJA)),rep("OSREDNJESLOVENSKA", length(OSREDNJESLOVENSKA)),rep("GORENJSKA", length(GORENJSKA)),rep("PRIMORSKONOTRANJSKA", length(PRIMORSKONOTRANJSKA)),rep("GORIŠKA", length(GORIŠKA)),rep("OBALNOKRAŠKA", length(OBALNOKRAŠKA)),rep("NEOPREDELJENO", 1),rep("UNDEFINED", 1) ))# Function to assign clear names to waste typesclear_waste_name <-function(name) {case_when(# Sawdust, chips, cuttings, wood, particle board and veneer, not specified in 03 01 04 name =="Sawdust/Chips/Wood, not specified in 03 01 04"~"Sawdust and Wood Cuttings", name =="Sawdust/Chips/Wood, not specified in 03 01 04"~"Sawdust and Wood Cuttings", name =="Paper and cardboard packaging and cardboard packaging"~"Paper and Cardboard Packaging", name =="Wooden packaging"~"Wooden Packaging", name =="Wood"~"Wood", name =="Wood, not specified in 20 01 37"~"Other Wood Waste", name =="Wood, not specified in 20 01 37"~"Other Wood Waste", name =="Waste bark and cork"~"Bark and Cork", name =="Waste not specified elsewhere"~"Unspecified Wood Waste",TRUE~ name )}# Functions ----## summarize waste stored at the start and at the end of the year per region and waste type, define a function, keep the year columnsummarize_waste_storage <-function(data) { data |>group_by(year, statistical_region, type_of_waste) |>summarize(waste_stored_start_year =sum(waste_stored_start_year, na.rm =TRUE) /1000,waste_stored_end_year =sum(waste_stored_end_year, na.rm =TRUE) /1000 ) |>ungroup()}# Theme ----# Base theme with rotated x-axis labelsmy_theme <-theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))
Generation Data Analysis Process
Data Loading and Preparation
Data from 2018 to 2022 was loaded from separate CSV files for each year.
The 2018 data required special handling due to its different structure:
Two separate files were used: generation/waste_generation_18.csv and generation/waste_management_18.csv.
These files were joined based on common columns (region, company, and waste type).
Data Standardization
Column names were standardized across all years to ensure consistency.
A year column was added to the 2018 data to match the structure of other years.
Data Transformation
For 2018 Data:
The delivered_to column was transformed into separate columns for different types of waste transfer.
A total_waste_stored column was created by summing waste_stored_start_year and waste_generated.
For All Years:
The clear_waste_name function was applied to standardize waste type names.
Data Combination
Data from 2019 to 2022 was combined using bind_rows().
The processed 2018 data was then appended to this combined dataset.
Analysis and Visualization
Total waste generation by year was calculated and visualized.
Waste generation was analyzed by region and year.
Waste generation was analyzed by waste type and year.
Waste transferred for treatment and waste stored at the end of the year were analyzed separately.
Key Data Transformations
Grouping: Data was frequently grouped by year, region, or waste type to allow for aggregate analysis.
Sum and Summarize: Within groups, waste quantities were summed to get total amounts.
Unit Conversion: Waste amounts were often divided by 1000 to convert from kilograms to tons.
Visualization Techniques
Line plots were used to show trends over time.
Interactive plots were created using ggplotly() for more detailed exploration.
Rows: 59 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (9): year, total_waste_generated_stored, temporarily_stored_start_year, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 60 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (9): year, total_waste_generated_stored, temporarily_stored_start_year, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 64 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (9): year, total_waste_generated_stored, temporarily_stored_start_year, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 59 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (9): year, total_waste_generated, temporary_stored_start_year, waste_gen...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 61 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (9): year, total_waste_generated, temporary_stored_start_year, waste_gen...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 66 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): waste_stored_start_year, waste_generated, waste_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 79 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, type_of_waste, delivered_to
dbl (1): total_waste_given_away
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 63 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (4): total_waste_2017, waste_stored_start_year, total_waste_generated, w...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 86 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, type_of_waste, management
dbl (2): total_waste_given_away, year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 66 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (5): year, total_waste_generated, waste_stored_start_year, waste_generat...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 92 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, type_of_waste, management
dbl (2): year, total_waste_given_away
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 42 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, previous_stored_start_year, previous_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 38 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, previous_stored_start_year, previous_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 71 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 39 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 34 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 39 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 39 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(waste.storage.2021.data) <-colnames(waste.storage.2016.data)names(waste.storage.2022.data) <-colnames(waste.storage.2016.data)combined_storage_data <-bind_rows(waste.storage.2016.data, waste.storage.2017.data, waste.storage.2018.data, waste.storage.2019.data, waste.storage.2020.data, waste.storage.2021.data, waste.storage.2022.data)# clear waste namescombined_storage_data <- combined_storage_data |>mutate(type_of_waste =clear_waste_name(type_of_waste))# export the combined storage data to the data folderwrite_csv(combined_storage_data, "Interface/Visualization/data/coll_storage_combined.csv")# Reshape data for plottingdf_long <-melt(combined_storage_data,id.vars =c("statistical_region", "type_of_waste", "year"),measure.vars =c("waste_stored_start_year", "waste_stored_end_year"),variable.name ="storage_time", value.name ="total_storage")# Grouped bar plott6 <-ggplot(df_long, aes(x = statistical_region, y = total_storage, fill = storage_time)) +geom_bar(stat ="identity", position ="dodge") +labs(title ="Waste Storage at Start and End of the Year",x ="Statistical Region", y ="Total Stored Waste" ) +scale_fill_manual(values =c("waste_stored_start_year"="blue", "waste_stored_end_year"="red"),labels =c("Start of Year", "End of Year") ) +coord_flip() +theme_minimal()ggplotly(t6, width =1000, height =500)
yearly_data <- combined_storage_data |>group_by(year) |>summarize(total_start =sum(waste_stored_start_year, na.rm =TRUE),total_end =sum(waste_stored_end_year, na.rm =TRUE) )t7 <-ggplot(yearly_data, aes(x = year)) +geom_point(aes(y = total_start, color ="Start of Year")) +geom_point(aes(y = total_end, color ="End of Year")) +geom_line(aes(y = total_start, color ="Start of Year")) +geom_line(aes(y = total_end, color ="End of Year")) +labs(title ="Total Stored Waste Over Time",y ="Total Stored Waste",color ="Time of Year" ) +theme_minimal()ggplotly(t7, width =1000, height =500)
`summarise()` has grouped output by 'type_of_waste'. You can override using the
`.groups` argument.
t8 <-ggplot(type_data, aes(x = year, y = total_start, color = type_of_waste)) +geom_point(aes(y = total_start, color ="Start of Year")) +geom_point(aes(y = total_end, color ="End of Year")) +geom_line(aes(y = total_start, color ="Start of Year")) +geom_line(aes(y = total_end, color ="End of Year")) +labs(title ="Total Stored Waste by Type Over Time",y ="Total Stored Waste",color ="Time of Year" ) +theme_minimal()ggplotly(t8, width =1000, height =500)
# outlier because of reporting or because of effects of COVID or something completely else?
Waste Received
Data Loading:
Waste collection data from the years 2016 to 2022 is loaded into separate dataframes.
Data Preparation:
The column names for the 2019, 2020, and 2021 data are renamed to match the 2022 data, ensuring consistency across all datasets.
Data Combination:
The data from all years is combined into a single dataframe called combined_received_data using bind_rows().
Data Reshaping:
The data is reshaped into a long-form format using the melt() function, resulting in columns for:
statistical_region
type_of_waste
year
source (indicating the source of the waste)
total_collected (indicating the amount of waste collected)
Visualization 1 - Grouped Bar Plot:
A grouped bar plot (t8) is created to show the total waste collected from different sources, grouped by statistical region. This plot uses different colors to represent each waste source.
Visualization 2 - Stacked Bar Plot:
A stacked bar plot (t8_stacked) is created to display the total waste collected by source, with the total_waste_collected layer placed behind the other sources. The bars are stacked and the plot is flipped horizontally using coord_flip().
Visualization 3 - Line Plot:
A line plot (t9) is generated to show how the amount of waste collected from different sources has changed over time. The plot includes lines representing waste from:
Producers (No Record)
Producers (With Record)
Collectors (RS)
Processors (RS)
This provides a visual trend of waste collection across years.
Interactive Plots:
The ggplotly() function is used to make the grouped bar plot, stacked bar plot, and line plot interactive, allowing for more detailed exploration of the data.
Rows: 53 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_collected, waste_from_producers_no_record, waste_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_collected, waste_from_producers_no_record, waste_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 53 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_collected, waste_from_producers_no_record, waste_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 53 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, waste_received, waste_from_producers_no_record, waste_from_pr...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 52 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_collected, waste_received_from_producers_no_list,...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_collected_waste, waste_from_producers_no_record, waste_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 53 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_collected, waste_from_producers_no_record, waste_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 54 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (8): year, total_waste_collected, total_waste_collected_including_wareho...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(waste.received.2019.data) <-colnames(waste.received.2022.data)names(waste.received.2020.data) <-colnames(waste.received.2022.data)names(waste.received.2021.data) <-colnames(waste.received.2022.data)# for 2023 data, if there is a column that has warehouse in its name, then exclude that columnwaste.received.2023.data <- waste.received.2023.data |>select(-contains("warehouse"))# Combine datacombined_received_data <-bind_rows(waste.received.2016.data, waste.received.2017.data, waste.received.2018.data, waste.received.2019.data, waste.received.2020.data, waste.received.2021.data, waste.received.2022.data, waste.received.2023.data)# clear waste namescombined_received_data <- combined_received_data |>mutate(type_of_waste =clear_waste_name(type_of_waste))# export the combined received data to the data folderwrite_csv(combined_received_data, "Interface/Visualization/data/coll_received_combined.csv")# Reshape data for plottingdf_long_received <-melt(combined_received_data,id.vars =c("statistical_region", "type_of_waste", "year"),measure.vars =c("waste_from_producers_no_record", "waste_from_producers_with_record", "waste_from_collectors_RS", "waste_from_processors_RS"),variable.name ="source", value.name ="total_collected")# Calculate total collected waste (excluding total_waste_collected)total_collected_per_region <- df_long_received |>group_by(statistical_region) |>summarize(total_collected =sum(total_collected, na.rm =TRUE)) |>arrange(desc(total_collected))# Reorder the regions in descending order of total_collecteddf_long_received$statistical_region <-factor(df_long_received$statistical_region, levels = total_collected_per_region$statistical_region)# Colorblind-friendly palette (Okabe-Ito)color_palette <-c("waste_from_producers_no_record"="#E69F00", # Orange"waste_from_producers_with_record"="#56B4E9", # Sky blue"waste_from_collectors_RS"="#009E73", # Green"waste_from_processors_RS"="#F0E442") # Yellow# Stacked bar plott8_stacked <-ggplot(df_long_received, aes(x = statistical_region, y = total_collected, fill = source)) +geom_col() +labs(title ="Waste Received by Source",x ="Statistical Region", y ="Total Collected Waste" ) +scale_fill_manual(values = color_palette,labels =c("From Producers (No Record)", "From Producers (With Record)", "From Collectors (RS)", "From Processors (RS)") ) +# coord_flip() + my_theme# Faceted bar plot by sourcet8_faceted <-ggplot(df_long_received, aes(x = statistical_region, y = total_collected, fill = source)) +geom_bar(stat ="identity", position ="dodge") +facet_wrap(~ source, ncol =1, scales ="free_y") +# Create a facet for each sourcelabs(title ="Waste Received by Source",x ="Statistical Region", y ="Total Collected Waste" ) +scale_fill_manual(values = color_palette,labels =c("From Producers (No Record)", "From Producers (With Record)", "From Collectors (RS)", "From Processors (RS)") ) + my_theme# Grouped bar plott8_grouped <-ggplot(df_long_received, aes(x = statistical_region, y = total_collected, fill = source)) +geom_bar(stat ="identity", position ="dodge") +# Group the bars by sourcelabs(title ="Waste Received by Source",x ="Statistical Region", y ="Total Collected Waste" ) +scale_fill_manual(values = color_palette,labels =c("From Producers (No Record)", "From Producers (With Record)", "From Collectors (RS)", "From Processors (RS)") ) + my_themeggplotly(t8_stacked, width =1000, height =1000)
ggplotly(t8_faceted, width =1000, height =1000)
ggplotly(t8_grouped, width =1000, height =1000)
# Year plotyearly_data_received <- combined_received_data |>group_by(year) |>summarize(total_collected =sum(total_waste_collected, na.rm =TRUE),from_producers_no_record =sum(waste_from_producers_no_record, na.rm =TRUE),from_producers_with_record =sum(waste_from_producers_with_record, na.rm =TRUE),from_collectors_RS =sum(waste_from_collectors_RS, na.rm =TRUE),from_processors_RS =sum(waste_from_processors_RS, na.rm =TRUE) )# Line and point plot with all years shownt9 <-ggplot(yearly_data_received, aes(x = year)) +geom_point(aes(y = from_producers_no_record, color ="From Producers (No Record)")) +geom_line(aes(y = from_producers_no_record, color ="From Producers (No Record)")) +geom_point(aes(y = from_producers_with_record, color ="From Producers (With Record)")) +geom_line(aes(y = from_producers_with_record, color ="From Producers (With Record)")) +geom_point(aes(y = from_collectors_RS, color ="From Collectors (RS)")) +geom_line(aes(y = from_collectors_RS, color ="From Collectors (RS)")) +geom_point(aes(y = from_processors_RS, color ="From Processors (RS)")) +geom_line(aes(y = from_processors_RS, color ="From Processors (RS)")) +labs(title ="Waste Collected by Source Over Time",y ="Waste Collected",color ="Source" ) + my_theme +scale_x_continuous(breaks =unique(yearly_data_received$year))ggplotly(t9, width =1000, height =500)
Municipal Waste Collected
Problem Statement:
Challenge: The code is attempting to create location data for companies based on 2018 data. The 2018 dataset contains address information within the name_of_company column. However, it is not guaranteed that the address from 2018 is the same for the company in 2019.
Assumption: For this analysis, the code assumes that the address for a company in 2019 is the same as in 2018.
Data Processing Steps:
Data Loading:
The municipal waste collection data from 2018 to 2022 is loaded into separate dataframes.
Data Preparation:
The column names in the 2018 data are aligned with those in the 2019 data for consistency.
The waste type names in the datasets for all years are cleaned using a function clear_waste_name().
Municipality of Origin Determination:
For the 2018 data, the company name is separated from the full address, and the municipality name is extracted from the address. The extracted address and municipality name are added as new columns to the dataframe.
Address and Municipality Matching:
The 2018 municipality and address data is joined with the 2019 data based on the name_of_company field.
For cases where municipality or address data is missing in 2019, “UNDEFINED” is used as a placeholder.
Statistical Region Mapping:
The municipality_region_mapping dataset is used to map each municipality to its corresponding statistical region for both 2018 and 2019 data.
Data Correction:
The names of certain municipalities (e.g., “IZOLA - ISOLA”, “KOPER - CAPODISTRIA”) are standardized in both the 2018 and 2019 datasets.
The address, municipality name, and statistical region for specific companies (e.g., “KOMUNALA RADGONA, javno podjetje d.o.o.”) are manually corrected in the 2019 data.
Data Aggregation:
The datasets for each year are grouped by year, statistical_region, name_of_municipality, and type_of_waste, and the total waste collected is summarized.
Data Combination:
The processed datasets from 2018 to 2022 are combined into a single dataframe, combined_municipal_data.
Data Reshaping:
The combined data is reshaped into a long format suitable for plotting using melt(), with columns for year, statistical_region, name_of_municipality, type_of_waste, and total_collected.
Visualizations:
Total Waste Collected by Municipality and Region (2018-2022):
A line plot (t10) is created to show the total waste collected by municipality and region from 2018 to 2022, with separate lines for each type of waste. The plot is faceted by statistical region.
Top 5 Municipalities by Total Waste Collected (2018-2022):
The top 5 municipalities based on total waste collected are identified.
A line plot (t11) is generated to visualize the total waste collected over the years for these top 5 municipalities, with separate lines for each municipality and waste type.
Rows: 131 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): name_of_company, type_of_waste
dbl (4): year, total_waste_collected, collected_from_households, collected_f...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 134 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): name_of_company, type_of_waste
dbl (4): year, total_waste_collected, municipal_waste_collected, waste_colle...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 470 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): type_of_waste, name_of_municipality, statistical_region
dbl (2): year, municipal_waste_collected_by_municipality
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 468 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): name_of_municipality, type_of_waste, statistical_region
dbl (2): year, waste_by_municipality
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 494 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, type_of_waste, name_of_municipality
dbl (2): year, waste_by_municipality
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 604 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): statistical_region, type_of_waste, name_of_municipality, waste_coll...
dbl (2): year, waste_by_municipality
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.management.2019.data <- waste.management.2019.data |>select(year, name_of_company, type_of_waste, total_waste_collected, municipal_waste_collected, waste_collected_from_activities)colnames(waste.management.2018.data) <-colnames(waste.management.2019.data)waste.management.2018.data <- waste.management.2018.data |>mutate(type_of_waste =clear_waste_name(type_of_waste))waste.management.2019.data <- waste.management.2019.data |>mutate(type_of_waste =clear_waste_name(type_of_waste))waste.management.2020.data <- waste.management.2020.data |>mutate(type_of_waste =clear_waste_name(type_of_waste))waste.management.2021.data <- waste.management.2021.data |>mutate(type_of_waste =clear_waste_name(type_of_waste))waste.management.2022.data <- waste.management.2022.data |>mutate(type_of_waste =clear_waste_name(type_of_waste))waste.management.2023.data <- waste.management.2023.data |>mutate(type_of_waste =clear_waste_name(type_of_waste))# define municipality of originwaste.management.2018.data <- waste.management.2018.data |>separate(name_of_company, into =c("company_name", "full_address"), sep ="; ", extra ="merge") |>mutate(address = full_address) |>mutate(name_of_municipality =str_extract(full_address, "\\d+\\s(.+)$") |>str_replace("^\\d+\\s", "")) |>select(-full_address)# now based on name_of_company of 2018, add the new columns to 2019 data based on 2018 datawaste.management.2019.data <- waste.management.2019.data |>left_join(waste.management.2018.data |>select(company_name, address, name_of_municipality), by =c("name_of_company"="company_name"))
Warning in left_join(waste.management.2019.data, select(waste.management.2018.data, : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 3 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
waste.management.2019.data <- waste.management.2019.data |>distinct()# if NA, then undefinedwaste.management.2019.data <- waste.management.2019.data |>mutate(name_of_municipality =ifelse(is.na(name_of_municipality), "UNDEFINED", name_of_municipality),address =ifelse(is.na(address), "UNDEFINED", address) )# add column for statistical regions based on name_of_municipalitywaste.management.2018.data <- waste.management.2018.data |>left_join(municipality_region_mapping, by ="name_of_municipality")# add column for statistical regions based on name_of_municipalitywaste.management.2019.data <- waste.management.2019.data |>left_join(municipality_region_mapping, by ="name_of_municipality")waste.management.2018.data <- waste.management.2018.data |>mutate(name_of_municipality =ifelse(name_of_municipality =="IZOLA - ISOLA", "IZOLA", name_of_municipality),name_of_municipality =ifelse(name_of_municipality =="KOPER - CAPODISTRIA", "KOPER", name_of_municipality),name_of_municipality =ifelse(name_of_municipality =="PIRAN - PIRANO", "PIRAN", name_of_municipality),name_of_municipality =ifelse(name_of_municipality =="LENDAVA - LENDVA", "LENDAVA", name_of_municipality) )waste.management.2019.data <- waste.management.2019.data |>mutate(name_of_municipality =ifelse(name_of_municipality =="IZOLA - ISOLA", "IZOLA", name_of_municipality),name_of_municipality =ifelse(name_of_municipality =="KOPER - CAPODISTRIA", "KOPER", name_of_municipality),name_of_municipality =ifelse(name_of_municipality =="PIRAN - PIRANO", "PIRAN", name_of_municipality),name_of_municipality =ifelse(name_of_municipality =="LENDAVA - LENDVA", "LENDAVA", name_of_municipality) )# change the address, name_of municipality and statistical region for KOMUNALA RADGONA, javno podjetje d.o.o.waste.management.2019.data <- waste.management.2019.data |>mutate(address =ifelse(name_of_company =="KOMUNALA RADGONA, javno podjetje d.o.o.", "PARTIZANSKA CESTA 13, 9250 GORNJA RADGONA", address),name_of_municipality =ifelse(name_of_company =="KOMUNALA RADGONA, javno podjetje d.o.o.", "GORNJA RADGONA", name_of_municipality),statistical_region =ifelse(name_of_company =="KOMUNALA RADGONA, javno podjetje d.o.o.", "POMURSKA", statistical_region) )# change the address, name_of municipality and statistical region for JAVNO PODJETJE VODOVOD KANALIZACIJA SNAGA d.o.o.waste.management.2019.data <- waste.management.2019.data |>mutate(address =ifelse(name_of_company =="JAVNO PODJETJE VODOVOD KANALIZACIJA SNAGA d.o.o.", "VODOVODNA CESTA 90, 1001 LJUBLJANA", address),name_of_municipality =ifelse(name_of_company =="JAVNO PODJETJE VODOVOD KANALIZACIJA SNAGA d.o.o.", "LJUBLJANA", name_of_municipality),statistical_region =ifelse(name_of_company =="JAVNO PODJETJE VODOVOD KANALIZACIJA SNAGA d.o.o.", "OSREDNJESLOVENSKA", statistical_region) )# remove company data as it is not needed for nowwaste.management.2018.data <- waste.management.2018.data |>select(-company_name, -address)waste.management.2019.data <- waste.management.2019.data |>select(-name_of_company, -address)# focus only on total waste collected for all yearswaste.management.2018.data <- waste.management.2018.data |>group_by(year, statistical_region, name_of_municipality, type_of_waste) |>summarize(total_waste_collected =sum(total_waste_collected, na.rm =TRUE)) |>ungroup()
`summarise()` has grouped output by 'year', 'statistical_region',
'name_of_municipality'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'year', 'statistical_region',
'name_of_municipality'. You can override using the `.groups` argument.
combined_municipal_data <-bind_rows(waste.management.2018.data, waste.management.2019.data, waste.management.2020.data, waste.management.2021.data, waste.management.2022.data, waste.management.2023.data)# clear waste namecombined_municipal_data <- combined_municipal_data |>mutate(type_of_waste =clear_waste_name(type_of_waste))# export the combined municipal data to the data folderwrite_csv(combined_municipal_data, "Interface/Visualization/data/coll_municipal_combined.csv")# Reshape data for plottingdf_long_municipal <-melt( combined_municipal_data,id.vars =c("year","statistical_region","name_of_municipality","type_of_waste" ),measure.vars =c("total_waste_collected"),variable.name ="source",value.name ="total_collected")t10 <-ggplot(df_long_municipal, aes(x = year, y = total_collected, color = name_of_municipality, group = type_of_waste)) +geom_line() +geom_point() +facet_wrap(~statistical_region, scales ="free_y") +labs(title ="Total Waste Collected by Municipality and Region (2018-2022)",x ="Year",y ="Total Waste Collected (in tons)",color ="Municipality") + my_themeggplotly(t10, width =1000, height =1000)
# Calculate total waste for each municipalitytop_municipalities <- df_long_municipal |>group_by(name_of_municipality) |>summarise(total_waste =sum(total_collected, na.rm =TRUE)) |>top_n(5, total_waste) |>pull(name_of_municipality)# Filter the original datasetdf_top_municipal <- df_long_municipal |>filter(name_of_municipality %in% top_municipalities)# Plott11 <-ggplot(df_top_municipal, aes(x = year, y = total_collected, color = name_of_municipality, group = name_of_municipality)) +geom_line() +geom_point() +labs(title ="Total Waste Collected by Top 5 Municipalities (2018-2022)",x ="Year",y ="Total Waste Collected (in tons)",color ="Municipality") +facet_wrap(~type_of_waste) + my_theme +theme(legend.position ="bottom")ggplotly(t11, width =1000, height =1000)
Municipal Waste Collected by Municipality of Origin
This section of the analysis focuses on the municipal waste collected by the municipality of origin from 2018 to 2022. The steps involved in the code are summarized as follows:
Data Loading:
Municipal waste collection data is read from CSV files for the years 2018 to 2022.
Data Summarization:
For each year, the data is grouped by year, statistical_region, name_of_municipality, and type_of_waste.
The total waste collected is summarized using the sum() function.
Column Selection and Renaming:
Relevant columns are selected and renamed to ensure consistency across different years.
Data Cleaning:
The type_of_waste column is cleaned using a custom function (clear_waste_name).
Data Combination:
Data from all years is combined into a single dataset using bind_rows().
Data Reshaping:
The combined data is reshaped for plotting using the melt() function.
Visualization:
A stacked bar plot is created to visualize the total waste collected by municipality of origin over the years.
The plot displays the amount of waste collected in tons, with different municipalities distinguished by color.
Rows: 580 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): name_of_company, statistical_region, name_of_municipality, type_of_...
dbl (2): year, total_waste_collected
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 413 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, name_of_municipality, type_of_waste
dbl (2): year, total_waste_collected
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 470 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): type_of_waste, name_of_municipality, statistical_region
dbl (2): year, municipal_waste_collected_by_municipality
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 468 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): name_of_municipality, type_of_waste, statistical_region
dbl (2): year, waste_by_municipality
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 494 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, type_of_waste, name_of_municipality
dbl (2): year, waste_by_municipality
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 604 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): statistical_region, type_of_waste, name_of_municipality, waste_coll...
dbl (2): year, waste_by_municipality
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# summarize by year, statistical region, name of municipality and type of wastewaste.collected.2018.data <- waste.collected.2018.data |>group_by(year, statistical_region, type_of_waste, name_of_municipality) |>summarize(waste_by_municipality =sum(total_waste_collected, na.rm =TRUE)) |>ungroup()
`summarise()` has grouped output by 'year', 'statistical_region',
'type_of_waste'. You can override using the `.groups` argument.
`summarise()` has grouped output by 'year', 'statistical_region',
'type_of_waste'. You can override using the `.groups` argument.
# clear_waste_namewaste.collected.2018.data <- waste.collected.2018.data |>mutate(type_of_waste =clear_waste_name(type_of_waste))waste.collected.2019.data <- waste.collected.2019.data |>mutate(type_of_waste =clear_waste_name(type_of_waste))waste.collected.2020.data <- waste.collected.2020.data |>mutate(type_of_waste =clear_waste_name(type_of_waste))waste.collected.2021.data <- waste.collected.2021.data |>mutate(type_of_waste =clear_waste_name(type_of_waste))waste.collected.2022.data <- waste.collected.2022.data |>mutate(type_of_waste =clear_waste_name(type_of_waste))waste.collected.2023.data <- waste.collected.2023.data |>mutate(type_of_waste =clear_waste_name(type_of_waste))# combine datacombined_collected_data <-bind_rows(waste.collected.2018.data, waste.collected.2019.data, waste.collected.2020.data, waste.collected.2021.data, waste.collected.2022.data, waste.collected.2023.data)# export the combined collected data to the data folderwrite_csv(combined_collected_data, "Interface/Visualization/data/coll_municipal_collected_combined.csv")# Reshape data for plottingdf_long_collected <-melt( combined_collected_data,id.vars =c("year","statistical_region","name_of_municipality","type_of_waste" ),measure.vars =c("waste_by_municipality"),variable.name ="source",value.name ="total_collected")# stacked bar plott12 <-ggplot(df_long_collected, aes(x = year, y = total_collected, fill = name_of_municipality, group = statistical_region)) +geom_bar(stat ="identity", position ="dodge") +labs(title ="Total Waste Collected by Municipality of Origin",x ="Year",y ="Total Waste Collected (in tons)",fill ="Municipality") + my_themeggplotly(t12, width =1000, height =1000)
Municipal Waste Management
This section analyzes municipal waste management from 2016 to 2022, focusing on various sources of waste management across different regions and waste types. The steps involved in the code are summarized below:
Data Loading:
Municipal waste management data is loaded from CSV files for the years 2016 to 2022.
Data Preparation:
Column names are standardized across different years to ensure consistency.
Missing columns for certain years (like waste_sent_to_non_EU) are added and initialized to zero.
Data Combination:
Data from all years is combined into a single dataset using bind_rows().
Data Filtering:
Rows with statistical_region labeled as “NEOPREDELJENO” are filtered out.
Data Reshaping:
The combined data is reshaped for plotting using the melt() function, focusing on variables related to waste management.
Visualizations:
Stacked Bar Plot: Visualizes the total waste given away by different sources for each statistical region using ggplot and ggplotly.
Bubble Plot: Shows the total waste given away by year, with bubble sizes representing the amount of waste.
Stacked Bar Plot by Year: Visualizes total waste by region and type across different years, with bars stacked by waste type.
Time Trend Line Plot: Illustrates waste management trends over time by statistical region.
Time Trend by Waste Type: Uses area plots to show waste composition over time by region.
Heatmap: Displays waste management intensity across regions and years.
Sparklines: Normalized line plots showing waste management trends by region in a compact format.
Rows: 51 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (5): year, total_waste_given_away, waste_given_to_processing_operator_RS...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_given_away, waste_given_to_collector_RS, waste_gi...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 52 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (5): year, total_waste_given_away, delivered_to_collector, delivered_to_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (5): year, total_waste_delivered, waste_delivered_to_other_collector_RS,...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 52 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_given_away, waste_handed_to_collectors_RS, waste_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_given_away, waste_handed_to_collectors_RS, waste_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_given_away, waste_handed_to_collectors_RS, waste_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 54 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_given_away, waste_handed_to_collectors_RS, waste_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(waste.management.2017.data) <-colnames(waste.management.2022.data)names(waste.management.2020.data) <-colnames(waste.management.2022.data)names(waste.management.2021.data) <-colnames(waste.management.2022.data)waste.management.2016.data <- waste.management.2016.data |>mutate(waste_sent_to_non_EU =0)waste.management.2018.data <- waste.management.2018.data |>mutate(waste_sent_to_non_EU =0)waste.management.2019.data <- waste.management.2019.data |>mutate(waste_sent_to_non_EU =0)names(waste.management.2016.data) <-colnames(waste.management.2022.data)names(waste.management.2018.data) <-colnames(waste.management.2022.data)names(waste.management.2019.data) <-colnames(waste.management.2022.data)combined_management_data <-bind_rows(waste.management.2016.data, waste.management.2017.data, waste.management.2018.data, waste.management.2019.data, waste.management.2020.data, waste.management.2021.data, waste.management.2022.data, waste.management.2023.data)# clear waste namecombined_management_data <- combined_management_data |>mutate(type_of_waste =clear_waste_name(type_of_waste))# filter out statistical_region = "NEOPREDELJENO"combined_management_data <- combined_management_data |>filter(statistical_region !="NEOPREDELJENO")# exportwrite_csv(combined_management_data, "Interface/Visualization/data/coll_management_combined.csv")# we only have 1 instance of waste_sent_to_non_EU # combined_management_data <- combined_management_data |> select(-waste_sent_to_non_EU)# Reshape data for plottingdf_long_management <-melt( combined_management_data,id.vars =c("statistical_region","type_of_waste","year" ),measure.vars =c("waste_handed_to_collectors_RS","waste_delivered_to_operators_RS","waste_sent_to_EU","waste_sent_to_non_EU" ),variable.name ="source",value.name ="total_waste_given_away")# stacked bar plott13 <-ggplot(df_long_management, aes(x = statistical_region, y = total_waste_given_away, fill = source)) +geom_bar(stat ="identity") +labs(title ="Waste Management by Source",x ="Statistical Region",y ="Total Waste Given Away",fill ="Source") +coord_flip() + my_themeggplotly(t13, width =1000, height =500)
# define a bubble plot to see the data through the yearst14 <-ggplot(combined_management_data, aes(x = year, y = total_waste_given_away, size = total_waste_given_away, color = type_of_waste, group = statistical_region)) +geom_point() +labs(title ="Total Waste Given Away by Year",x ="Year",y ="Total Waste Given Away",color ="Type of Waste") + my_themeggplotly(t14, width =1000, height =500)
Rows: 30 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 24 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 15 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 29 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 31 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 31 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (3): year, waste_stored_start_year, waste_stored_end_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 10 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, quantity_stored_start_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 55 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): statistical_region, type_of_waste_inlet, type_of_waste_outlet, subs...
dbl (2): year, stored_quantity_start_year
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
structure(list(year = c(2023, 2023, 2023, 2023, 2023, 2023),
statistical_region = c("GORENJSKA", "GORENJSKA", "GORENJSKA",
"GORENJSKA", "GORENJSKA", "GORENJSKA"), type_of_waste_inlet = c("Bark and Cork Waste",
"Bulky Waste", "Paper/Cardboard Packaging", "Sawdust/Chips/Wood, not specified in 03 01 04",
"Sawdust/Chips/Wood, not specified in 03 01 04", "Sawdust/Chips/Wood, not specified in 03 01 04"
), type_of_waste_outlet = c("undefined", "undefined", "Paper/Cardboard Packaging",
"Wood, not specified in 19 12 06", "undefined", "undefined"
), substance_or_object = c("99 – drugo", "7 – les", "undefined",
"undefined", "7 – les", "99 – drugo"), stored_quantity_start_year = c(0,
23.423, 4.565, 8.781, 0, 0)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
flow_2023 <- waste.storage.2023.data # the flow is defined by the type of waste inlet, to type of waste outlet and the final node is the product# Create separate nodes for inlet and outlet stagesinlet_nodes <-paste0(unique(flow_2023$type_of_waste_inlet), " (Inlet)")outlet_nodes <-paste0(unique(flow_2023$type_of_waste_outlet), " (Outlet)")product_nodes <-paste0(unique(flow_2023$substance_or_object), " (Product)")print(outlet_nodes)
[1] "undefined (Outlet)"
[2] "Paper/Cardboard Packaging (Outlet)"
[3] "Wood, not specified in 19 12 06 (Outlet)"
[4] "Sawdust/Chips/Wood, not specified in 03 01 04 (Outlet)"
[5] "Wooden packaging (Outlet)"
[6] "Other Wastes (including mixtures of materials) from Mechanical Treatment, not specified in 19 12 11 (Outlet)"
[7] "Wood, not specified in 20 01 37 (Outlet)"
# Combine all nodes in the correct ordernodes <-c(inlet_nodes, outlet_nodes, product_nodes)# Create links for both stageslinks_stage1 <- flow_2023 |>mutate(source =paste0(type_of_waste_inlet, " (Inlet)"),target =paste0(type_of_waste_outlet, " (Outlet)") ) |>group_by(source, target) |>summarise(value =sum(stored_quantity_start_year)) |>ungroup()
`summarise()` has grouped output by 'source'. You can override using the
`.groups` argument.
t20 <-ggplot(combined_storage_data, aes(x = year,group =interaction(statistical_region, type_of_waste))) +geom_line(aes(y = waste_stored_start_year, color ="Start Year")) +geom_line(aes(y = waste_stored_end_year, color ="End Year")) +geom_point(data = matching_instances,aes(y = waste_stored_end_year),color ="red",size =3 ) +labs(title ="Waste Storage Data (2018-2022)",subtitle ="Red points indicate matching instances where End Year matches Start Year of the next year",x ="Year",y ="Waste Stored",color ="Legend" ) +facet_wrap( ~ statistical_region + type_of_waste, scales ="free_y") + my_themeggplotly(t20, width =1000, height =1000)
# Ensure every combination of statistical_region, type_of_waste, and year existscomplete_data <- combined_storage_data |>complete( year,nesting(statistical_region, type_of_waste),fill =list(waste_stored_start_year =0,waste_stored_end_year =0 ) )# Add a lagged column for the previous year's end year and calculate the differencecomplete_data <- complete_data |>arrange(statistical_region, type_of_waste, year) |>group_by(statistical_region, type_of_waste) |>mutate(previous_end_year =lag(waste_stored_end_year, 1)) |>ungroup()# Create flags for "Outside Period"complete_data <- complete_data |>mutate(outside_period_next_start =ifelse(is.na(next_year_start), TRUE, FALSE),outside_period_prev_end =ifelse(is.na(previous_end_year), TRUE, FALSE) )# Replace NA with 0 or another placeholder for plotting purposescomplete_data <- complete_data |>mutate(next_year_start =ifelse(is.na(next_year_start), 0, next_year_start),previous_end_year =ifelse(is.na(previous_end_year), 0, previous_end_year) )complete_data <- complete_data |>mutate(difference = waste_stored_start_year - previous_end_year)# Define a threshold for significant change (e.g., 10% increase or decrease)threshold <-0.1complete_data <- complete_data |>mutate(significant_change =case_when( difference > (previous_end_year * threshold) ~"Increase", difference <-(previous_end_year * threshold) ~"Decrease",TRUE~"No Significant Change" ) )filter_by_waste <- complete_data |>filter( type_of_waste =="Paper/Cardboard Packaging"& statistical_region =="OSREDNJESLOVENSKA" )# Define Unicode arrowsarrow_up <-"\u2191"# Up arrowarrow_down <-"\u2193"# Down arrowequal <-"\u003d"# Equal signt21 <-ggplot(filter_by_waste, aes(x = year, group = statistical_region)) +geom_line(aes(y = waste_stored_start_year, color ="Start Year", linetype ="Start Year")) +geom_line(aes(y = waste_stored_end_year, color ="End Year", linetype ="End Year")) +geom_point(aes(y = waste_stored_start_year, shape = significant_change, color = significant_change,text =paste("Year:", year,"<br>Waste at start:", waste_stored_start_year,"<br>Change:", significant_change), size =5)) +geom_point(aes(y = waste_stored_end_year, shape ="End Year", color ="End Year",text =paste("Year:", year,"<br>Waste at end:", waste_stored_end_year), size =5)) +scale_shape_manual(values =c("Increase"=24, "Decrease"=25, "No Change"=21, "End Year"=21),name ="Change in Waste") +scale_color_manual(values =c("Start Year"="blue", "End Year"="red","Increase"="#19a90b", "Decrease"="#a90b0b", "No Change"="grey"),name ="Data Type") +scale_linetype_manual(values =c("Start Year"="solid", "End Year"="solid"),name ="Year Type") +labs(x ="Year",y ="Waste Stored (in tons)") +facet_wrap(~statistical_region, scales ="free_y") +theme_minimal() +theme(legend.position ="right",legend.box ="vertical",legend.margin =margin(t =10, r =10, b =10, l =10),legend.spacing.y =unit(0.5, "cm"))
Warning in geom_point(aes(y = waste_stored_start_year, shape =
significant_change, : Ignoring unknown aesthetics: text
Warning in geom_point(aes(y = waste_stored_end_year, shape = "End Year", :
Ignoring unknown aesthetics: text
# Convert to plotlyt22 <-ggplotly(t21, width =1000, height =1000, tooltip ="text") # Adjust legendt22 <- t22 |>layout(legend =list(x =1.02, y =0.5), title =list(text =paste0('Paper/Cardboard Packaging Waste Storage Data','<br>','<sup>','Triangles indicate significant increases or decreases in waste compared to previous year end','</sup>'),x =0.01),margin =list(l =50, r =50, b =100, t =80, pad =4))t22
# Prepare data for waterfall chartwaterfall_data <- filter_by_waste |>arrange(year) |>mutate(end_year =paste0(as.numeric(levels(year))[year], " End"),start_next_year =paste0(as.numeric(levels(year))[year] +1, " Start"),end_amount = waste_stored_end_year,start_amount =lead(waste_stored_start_year),difference =lead(waste_stored_start_year) - waste_stored_end_year ) |>select(end_year, start_next_year, end_amount, start_amount, difference) |> tidyr::pivot_longer(cols =c(end_year, start_next_year),names_to ="type",values_to ="year" ) |>mutate(amount =ifelse(type =="end_year", end_amount, difference),cumulative =cumsum(amount),color_category =case_when( type =="end_year"~"End Year", difference >0~"Increase", difference <0~"Decrease",TRUE~"No Change" ) ) |>filter(!is.na(difference))# Create a new column for ordered factorwaterfall_data$year <-with(waterfall_data, paste(year, ifelse(type =="end_year", "", ""), sep =" "))# Ensure 'order_label' is a factor with the desired orderwaterfall_data$year <-factor(waterfall_data$year, levels =unique(waterfall_data$year))# Create the waterfall plot with new color schemewaterfall_plot <-ggplot(waterfall_data, aes(x = year, y =ifelse(waterfall_data$amount ==0, start_amount, amount), fill = color_category)) +geom_col(color ="black",aes(text =paste0("Year: ", year, "<br>","Amount: ", round(amount, 2), " tons<br>","Cumulative: ", round(cumulative, 2), " tons" ))) +geom_text(aes(label =round(amount, 1), y =ifelse(amount >=0, amount, amount) +0.05*max(amount)), vjust =0, size =3) +scale_fill_manual(values =c("Increase"="#006400", "Decrease"="#8B0000", "No Change"="#808080", "End Year"="#4169E1"),name ="Change Type") +labs(x ="Year",y ="Waste Amount (tons)") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))
Warning in geom_col(color = "black", aes(text = paste0("Year: ", year, "<br>",
: Ignoring unknown aesthetics: text
# Convert to plotly for interactivityt23 <-ggplotly(waterfall_plot, width =1000, height =1000, tooltip ="text")
Warning: Use of `waterfall_data$amount` is discouraged.
ℹ Use `amount` instead.
# Adjust layoutt23 <- t23 |>layout(title =list(text =paste0("Comparison of Waste Stored at Year's End and Next Year's Start",'<br>','<sup>','Paper/Cardboard Packaging in OSREDNJESLOVENSKA Region','</sup>'),x =0.01),margin =list(t =100))t23
variant_data <- filter_by_waste |>arrange(year) |>mutate(end_year =paste0(as.numeric(levels(year))[year], " End"),start_next_year =paste0(as.numeric(levels(year))[year] +1, " Start"),end_amount = waste_stored_end_year,start_amount =lead(waste_stored_start_year),difference =lead(waste_stored_start_year) - waste_stored_end_year ) |>select(end_year, start_next_year, end_amount, start_amount, difference) |> tidyr::pivot_longer(cols =c(end_year, start_next_year),names_to ="type",values_to ="year" ) |>mutate(amount =ifelse(type =="end_year", end_amount, start_amount),difference =ifelse(type =="start_next_year", difference, 0),cumulative =cumsum(amount),color_category =case_when( type =="end_year"~"End Year", difference >0~"Increase", difference <0~"Decrease",TRUE~"No Change" ) ) |>filter(!is.na(start_amount))# Create a new column for ordered factorvariant_data$year <-with(variant_data, paste(year, ifelse(type =="end_year", "", ""), sep =" "))# Ensure 'year' is a factor with the desired ordervariant_data$year <-factor(variant_data$year, levels =unique(variant_data$year))# Prepare data for side-by-side barsvariant_data_long <- variant_data |> tidyr::pivot_longer(cols =c(amount, difference),names_to ="bar_type",values_to ="value" ) |>mutate(bar_category =case_when( bar_type =="amount"& type =="end_year"~"End Year", bar_type =="amount"& type !="end_year"~"Start Amount", bar_type =="difference"& color_category =="Increase"~"Increase", bar_type =="difference"& color_category =="Decrease"~"Decrease",TRUE~"No Change" ) )# Define the desired order of bar categoriesdesired_order <-c("Start Amount", "Increase", "End Year", "Decrease", "No Change")# Reorder the data based on the desired order of bar categoriesvariant_data_long <- variant_data_long |>mutate(bar_category =factor(bar_category, levels = desired_order)) |>arrange(bar_category)# Create the variant waterfall plot with side-by-side barsvariant_plot <-ggplot(variant_data_long, aes(x = year, y = value, fill = bar_category)) +geom_col(position =position_identity(),color ="black",aes(text =paste0("Year: ", year, "<br>","Type: ", bar_type, "<br>","Value: ", round(value, 2), " tons<br>"#"Cumulative: ", round(cumulative, 2), " tons" ))) +geom_text(aes(label =ifelse(value >0, round(value, 1), ifelse(value ==0, NA, round(value, 1))),y =ifelse(value >=0, value, value) +0.05*max(value)),position =position_dodge(width =0.9),vjust =-0.5, size =3) +scale_fill_manual(values =c("End Year"="#4169E1", "Increase"="#006400", # Increase from Last Year"Start Amount"="#808080","Decrease"="#8B0000", # Decrease from Last Year"No Change"="#D3D3D3"),name ="Type") +labs(x ="Year",y ="Waste Amount (tons)") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))
Warning in geom_col(position = position_identity(), color = "black", aes(text =
paste0("Year: ", : Ignoring unknown aesthetics: text
# Convert variant plot to plotly for interactivityt24 <-ggplotly(variant_plot, width =1000, height =800, tooltip ="text") # Adjust layout for variant plott24 <- t24 |>layout(title =list(text =paste0("Comparison of Waste Stored at Year's End and Next Year's Start (Variant)",'<br>','<sup>','Paper/Cardboard Packaging in OSREDNJESLOVENSKA Region','</sup>'),x =0.01),margin =list(t =100))t23
Rows: 56 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (8): year, total_waste_collected, waste_collected_from_collector_RS, was...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 53 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (8): year, total_waste_received, waste_received_own_waste, waste_receive...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (8): year, total_waste_received, received_from_own_waste_OVD, received_f...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 9
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (7): year, total_waste_received, waste_received_from_producer, waste_rec...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (8): year, total_waste_received, waste_received_own_waste, waste_receive...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 52 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (8): year, total_waste_received, waste_received_own_waste, waste_receive...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 54 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (9): year, total_waste_received, untreated_waste_from_storage_start_year...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
waste.collected.2019.data <- waste.collected.2019.data |>mutate(waste_received_own_waste =0)waste.collected.2019.data <- waste.collected.2019.data |>select(year, statistical_region, type_of_waste, total_waste_received, waste_received_own_waste, waste_received_from_producer, waste_received_from_collector, waste_received_from_processing_operator, waste_received_from_other_EU_country, waste_received_from_other_country_outside_EU)names(waste.collected.2016.data) <-colnames(waste.collected.2021.data)names(waste.collected.2017.data) <-colnames(waste.collected.2021.data)names(waste.collected.2018.data) <-colnames(waste.collected.2021.data)names(waste.collected.2019.data) <-colnames(waste.collected.2021.data)names(waste.collected.2020.data) <-colnames(waste.collected.2021.data)# define a function to add to each waste.collected the column untreated_waste_from_storage_start_yearadd_untreated_waste <-function(data) { data |>group_by(statistical_region, type_of_waste) |>mutate(untreated_waste_from_storage_start_year =0) |>ungroup()}waste.collected.2016.data <-add_untreated_waste(waste.collected.2016.data)waste.collected.2017.data <-add_untreated_waste(waste.collected.2017.data)waste.collected.2018.data <-add_untreated_waste(waste.collected.2018.data)waste.collected.2019.data <-add_untreated_waste(waste.collected.2019.data)waste.collected.2020.data <-add_untreated_waste(waste.collected.2020.data)waste.collected.2021.data <-add_untreated_waste(waste.collected.2021.data)colnames(waste.collected.2021.data)
# define a function to select columnsselect_columns <-function(data) { data |>select(year, statistical_region, type_of_waste, total_waste_received, untreated_waste_from_storage_start_year, waste_received_own_waste, waste_received_from_producers_RS, waste_received_from_collectors_RS, waste_received_from_processors_RS, waste_received_from_EU, waste_received_from_non_EU)}waste.collected.2016.data <-select_columns(waste.collected.2016.data)waste.collected.2017.data <-select_columns(waste.collected.2017.data)waste.collected.2018.data <-select_columns(waste.collected.2018.data)waste.collected.2019.data <-select_columns(waste.collected.2019.data)waste.collected.2020.data <-select_columns(waste.collected.2020.data)waste.collected.2021.data <-select_columns(waste.collected.2021.data)names(waste.collected.2022.data) <-colnames(waste.collected.2021.data)combined_collected_data <-bind_rows(waste.collected.2016.data, waste.collected.2017.data, waste.collected.2018.data, waste.collected.2019.data, waste.collected.2020.data, waste.collected.2021.data, waste.collected.2022.data)combined_collected_data <- combined_collected_data |>mutate(type_of_waste =clear_waste_name(type_of_waste))# export write_csv(combined_collected_data, "Interface/Visualization/data/trt_collected_combined.csv")# Summarize total waste received by yearyearly_total <- combined_collected_data |>group_by(year) |>summarize(total_waste =sum(total_waste_received, na.rm =TRUE))# Create the plotly line chartplot_yearly <-plot_ly(yearly_total, x =~year, y =~total_waste, type ='scatter', mode ='lines+markers') |>layout(title ="Total Waste Received Over Years",xaxis =list(title ="Year"),yaxis =list(title ="Total Waste Received"))# Display the plotplot_yearly
# Summarize total waste received by statistical regionregion_total <- combined_collected_data |>group_by(statistical_region) |>summarize(total_waste =sum(total_waste_received, na.rm =TRUE)) |>arrange(desc(total_waste))# Create the plotly bar chartplot_region <-plot_ly(region_total, x =~statistical_region, y =~total_waste, type ='bar') |>layout(title ="Total Waste Received by Statistical Region",xaxis =list(title ="Statistical Region"),yaxis =list(title ="Total Waste Received"))# Display the plotplot_region
Warning: Ignoring 1 observations
# Summarize total waste received by type of wastewaste_type_total <- combined_collected_data |>group_by(type_of_waste) |>summarize(total_waste =sum(total_waste_received, na.rm =TRUE)) |>arrange(desc(total_waste)) # Create the plotly bar chartplot_waste_type <-plot_ly(waste_type_total, x =~type_of_waste, y =~total_waste, type ='bar') |>layout(title ="Total Waste Received by Type of Waste",xaxis =list(title ="Type of Waste"),yaxis =list(title ="Total Waste Received"))# Display the plotplot_waste_type
Rows: 54 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, total_waste_given_away
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 52 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, total_waste_given_away
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, total_waste_given_away
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, total_waste_for_processing
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 51 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, waste_entering_treatment_process
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 53 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, waste_entering_treatment_process
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 54 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (2): year, waste_quantity_per_treatment
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(waste.treatment.2016.data) <-colnames(waste.treatment.2021.data)names(waste.treatment.2017.data) <-colnames(waste.treatment.2021.data)names(waste.treatment.2018.data) <-colnames(waste.treatment.2021.data)names(waste.treatment.2019.data) <-colnames(waste.treatment.2021.data)names(waste.treatment.2022.data) <-colnames(waste.treatment.2021.data)combined_treatment_data <-bind_rows(waste.treatment.2016.data, waste.treatment.2017.data, waste.treatment.2018.data, waste.treatment.2019.data, waste.treatment.2020.data, waste.treatment.2021.data, waste.treatment.2022.data)# if statistical_region NA, set to "NEOPREDELJENO"combined_treatment_data <- combined_treatment_data |>mutate(statistical_region =ifelse(is.na(statistical_region), "NEOPREDELJENO", statistical_region))combined_treatment_data <- combined_treatment_data |>mutate(type_of_waste =clear_waste_name(type_of_waste))# exportwrite_csv(combined_treatment_data, "Interface/Visualization/data/trt_treatment_combined.csv")# Aggregate data by year and type of wasteaggregated_data <- combined_treatment_data |># filter out statistical_region = "NEOPREDELJENO"filter(statistical_region !="NEOPREDELJENO") |>group_by(year, type_of_waste) |>summarize(total_waste =sum(waste_entering_treatment_process, na.rm =TRUE))
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
# Create the ggplot visualizationp <-ggplot(aggregated_data, aes(x = year, y = total_waste, color = type_of_waste, group = type_of_waste)) +geom_line(size =1) +geom_point(size =3) +labs(title ="Waste Treatment Over Time by Waste Type",x ="Year",y ="Total Waste Entering Treatment Process",color ="Type of Waste") +theme_minimal() +theme(legend.position ="bottom",plot.title =element_text(hjust =0.5, face ="bold"))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Rows: 36 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_generated_untreated, waste_generated_to_processin...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 37 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_generated, waste_generated_collector_RS, waste_ge...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 37 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, total_waste_generated, delivered_to_collector_RS, delivered_t...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 34 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, type_of_waste
dbl (6): year, waste_recovered_and_given_away, waste_recovered_and_given_awa...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 2 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): input_waste_name, output_waste_name
dbl (5): year, waste_handed_to_collectors_RS, waste_delivered_to_operators_R...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 41 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, input_waste_name, output_waste_name
dbl (5): year, waste_handed_to_collectors_RS, waste_delivered_to_operators_R...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 123 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): name_of_company, statistical_region, name_of_municipality, type_of_...
dbl (2): year, total_waste_received
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 91 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, name_of_municipality, type_of_waste
dbl (2): year, total_waste_received
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 158 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, name_of_municipality, type_of_waste
dbl (2): year, waste_collected_by_municipality
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 164 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): statistical_region, name_of_municipality, type_of_waste
dbl (2): year, waste_collected_by_municipality
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# exclude 2022 data, as we cannot compare it with the previous years, because the data is missing type_of_waste columnmunicipal.waste.received.2022.data <-read_csv("2022 data/2022 filtered/treatment/mbt_municipal_origin_22.csv")
Rows: 197 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): statistical_region, name_of_municipality
dbl (2): year, mixed_waste_mech_bio_treatment
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Column Content:# 1. Year# 2. Statistical Region# 3. Name of Municipality# 4. Type of Waste# 5. Total Waste Received# Calculate the average waste collected per municipality across all yearswaste_by_municipality <- combined_municipal_waste_received_data |>group_by(statistical_region, name_of_municipality) |>summarise(avg_waste =mean(waste_collected_by_municipality, na.rm =TRUE)) |>ungroup() |># remove space between words in name_of_municipalitymutate(name_of_municipality =str_replace_all(name_of_municipality, " ", ""))
`summarise()` has grouped output by 'statistical_region'. You can override
using the `.groups` argument.
slovenia_map <- sf::st_read("map.geojson")
Reading layer `map' from data source
`C:\Users\kovac\Desktop\Work\Green UP Project\green-up-project\map.geojson'
using driver `GeoJSON'
Simple feature collection with 192 features and 13 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 13.3821 ymin: 45.4283 xmax: 16.5843 ymax: 46.8782
Geodetic CRS: WGS 84
slovenia_map <- slovenia_map |>mutate(NAME_1 =str_to_upper(NAME_1),NAME_2 =str_to_upper(NAME_2))# Join the waste data with the map dataslovenia_map_with_data <- slovenia_map |>left_join(waste_by_municipality, by =c("NAME_2"="name_of_municipality"))# check how many non-NA values does the column statistical_region haveprint(sum(!is.na(slovenia_map_with_data$statistical_region)))
[1] 117
# which of the name_of_municipality values are not in the map data from the waste_by_municipality dataprint(setdiff(waste_by_municipality$name_of_municipality, slovenia_map_with_data$NAME_2))
# if slovenia_map_with_data$statistical_region is NA, put the region from NAME_1slovenia_map_with_data <- slovenia_map_with_data |>mutate(statistical_region =ifelse(is.na(statistical_region), NAME_1, statistical_region))static_map <-ggplot(slovenia_map_with_data) +geom_sf(aes(fill = avg_waste, text =paste("Municipality:", NAME_2, "<br>",ifelse(is.na(avg_waste), "No data available", paste("Average Waste:", round(avg_waste, 2)) ) ))) +scale_fill_viridis(option ="plasma", name ="Average Waste\n(2018-2021)", labels = scales::comma,na.value ="grey80"# Color for municipalities with no data ) +theme_minimal() +labs(title ="Average Municipal Waste Collected in Slovenia (2018-2021)") +theme(plot.title =element_text(size =16, face ="bold"),plot.subtitle =element_text(size =12),legend.position ="right",axis.text =element_blank(),axis.ticks =element_blank() )
Warning in layer_sf(geom = GeomSf, data = data, mapping = mapping, stat = stat,
: Ignoring unknown aesthetics: text
# Convert to an interactive plotinteractive_map <-ggplotly(static_map, tooltip ="text", width =1000, height =800) |>layout(hoverlabel =list(bgcolor ="white", font =list(family ="Arial", size =12)) ) # Display the interactive mapinteractive_map
# Trend analysis by municipalitytrend_by_municipality <- combined_municipal_waste_received_data |>group_by(year, name_of_municipality) |>summarise(total_waste =sum(waste_collected_by_municipality, na.rm =TRUE)) |>ggplot(aes(x = year, y = total_waste, color = name_of_municipality)) +geom_line() +labs(title ="Waste Collected Over Time by Municipality",x ="Year", y ="Waste Collected")
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
# Regional comparisonregion_comparison <- combined_municipal_waste_received_data |>group_by(statistical_region) |>summarise(total_waste =sum(waste_collected_by_municipality, na.rm =TRUE)) |>ggplot(aes(x =reorder(statistical_region, -total_waste), y = total_waste, fill = statistical_region)) +geom_bar(stat ="identity") +labs(title ="Total Waste Collected by Region",x ="Statistical Region", y ="Waste Collected") +theme(axis.text.x =element_text(angle =45, hjust =1))# create columns based on type_of_waste, first lets get distinct types of wastedistinct_waste_types <- combined_municipal_waste_received_data |>distinct(type_of_waste)# Paper/Cardboard Packaging# Other Wood Waste# Wooden packaging# create columns for each waste typecombined_municipal_waste_received_data <- combined_municipal_waste_received_data |>mutate(paper_cardboard_packaging =ifelse(type_of_waste =="Paper/Cardboard Packaging", waste_collected_by_municipality, 0),other_wood_waste =ifelse(type_of_waste =="Other Wood Waste", waste_collected_by_municipality, 0),wooden_packaging =ifelse(type_of_waste =="Wooden Packaging", waste_collected_by_municipality, 0))# Correlation analysis between waste typescorrelation_analysis <- combined_municipal_waste_received_data |>group_by(name_of_municipality) |>summarise(paper_cardboard_packaging =sum(paper_cardboard_packaging, na.rm =TRUE),other_wood_waste =sum(other_wood_waste, na.rm =TRUE),wooden_packaging =sum(wooden_packaging, na.rm =TRUE)) |>ggplot(aes(x = paper_cardboard_packaging, y = other_wood_waste)) +geom_point() +geom_smooth(method ="lm") +labs(title ="Correlation between Paper/Cardboard Packaging and Other Wood Waste",x ="Paper/Cardboard Packaging", y ="Other Wood Waste")ggplotly(trend_by_municipality, width =1000, height =800)
Rows: 5 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): name_of_company, type_of_waste
dbl (2): year, lost_mass
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 6 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): name_of_company, type_of_waste
dbl (2): year, lost_mass
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 4 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): input_waste_name
dbl (2): year, mass_change
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 3 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): input_waste_name
dbl (2): year, mass_change
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 7 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): type_of_waste, treatment_operation
dbl (2): year, reduction_or_increase_of_mass_during_treatment
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
`summarise()` has grouped output by 'year', 'type_of_waste'. You can override
using the `.groups` argument.
skim(aggregated_data)
Data summary
Name
aggregated_data
Number of rows
19
Number of columns
4
_______________________
Column type frequency:
character
2
numeric
2
________________________
Group variables
None
Variable type: character
skim_variable
n_missing
complete_rate
min
max
empty
n_unique
whitespace
type_of_waste
0
1.00
4
25
0
5
0
treatment_operation
12
0.37
4
10
0
4
0
Variable type: numeric
skim_variable
n_missing
complete_rate
mean
sd
p0
p25
p50
p75
p100
hist
year
0
1
2020.53
1.43
2018.00
2019.50
2021.00
2022.00
2022.00
▂▃▅▃▇
total_mass_change
0
1
55.28
238.94
-494.74
0.09
18.67
65.89
880.15
▁▇▆▁▁
p <- aggregated_data |>ggplot(aes(x = year, y = total_mass_change, fill = type_of_waste)) +geom_col(position ="dodge") +facet_wrap(~ treatment_operation) +theme_minimal() +labs(title ="Mass Change During Treatment Over the Years by Waste Type and Operation",x ="Year", y ="Total Mass Change")ggplotly(p, width =1000, height =800)
p <- aggregated_data |>ggplot(aes(x = year, y = total_mass_change, fill = treatment_operation)) +geom_col(position ="dodge") +facet_wrap(~ type_of_waste) +theme_minimal() +labs(title ="Mass Change During Treatment Over the Years by Waste Type and Operation",x ="Year", y ="Total Mass Change")ggplotly(p, width =1000, height =800)
p <- aggregated_data |>ggplot(aes(x = year, y = total_mass_change, fill = type_of_waste, color = treatment_operation)) +geom_col(position ="dodge") +theme_minimal() +labs(title ="Mass Change During Treatment Over the Years by Waste Type and Operation",x ="Year", y ="Total Mass Change")ggplotly(p, width =1000, height =800)
Rows: 19 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): name_of_company, landfill, start_date, end_date
dbl (7): year, filled_volume_start_year, filled_area_start_year, free_volume...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 2 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): name_of_company, landfill, type_of_waste
dbl (2): total_non_hazardous_waste_received, total_deposited
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 1 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): name_of_company, landfill, type_of_waste
dbl (3): year, total_non_hazardous_waste_received, disposed
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.